#include "matrix.h"
#include <cuda.h>
#include <cuda_runtime_api.h>

namespace uzu
{
    void AllocateDeviceMemory(float** buffer, const MatrixF& mat, bool copy)
    {
        uint32_t size = mat.cols * mat.rows;
        cudaMalloc((void**)buffer, size * sizeof(float));
        if (copy)
        {
            cudaMemcpy(*buffer, mat.data, size * sizeof(float), cudaMemcpyHostToDevice);
        }
    }
}
